This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
library(gganimate)## Loading required package: ggplot2
library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
animated = TRUE
log = "x"
#Load data
student_ratio <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-07/student_teacher_ratio.csv")##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## edulit_ind = col_character(),
## indicator = col_character(),
## country_code = col_character(),
## country = col_character(),
## year = col_double(),
## student_ratio = col_double(),
## flag_codes = col_character(),
## flags = col_character()
## )
#Load annual GDP data/capita sourced from World Bank (https://data.worldbank.org/indicator/ny.gdp.pcap.cd?end=2017&start=1960)
Annual_GDP <- readr::read_delim("https://raw.githubusercontent.com/LiamDBailey/TidyTuesday/master/inst/extdata/GDP_data.csv", delim = "\t")##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Country Name` = col_character(),
## `Country Code` = col_character(),
## `Indicator Name` = col_character(),
## `Indicator Code` = col_character(),
## `2018` = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.
#Reshape GDP data so we have a year column
Reshape_GDP <- Annual_GDP %>%
#Remove Indicator Name and Code, these are unimportant
select(-3:-4) %>%
reshape2::melt(id.vars = 1:2, na.rm = TRUE, variable.name = "year", value.name = "GDP") %>%
#Rename columns to make them correspond between 2 datasets
rename(country_GDP = `Country Name`, country_code = `Country Code`) %>%
#Mutate year into numeric
mutate(year = as.integer(as.character(year)))
#Subset to include student ratio data that also has GDP info
student_ratio_GDP <- student_ratio %>%
filter(country_code %in% Reshape_GDP$country_code) %>%
left_join(Reshape_GDP, by = c("country_code", "year")) %>%
#Exclude those rows with no GDP
filter(!is.na(GDP)) %>%
mutate(year = as.integer(year)) %>%
#Just look at primary education
filter(indicator == "Primary Education")
if(animated){
#Find the best and worst student ratio
worst <- student_ratio_GDP %>%
group_by(country_code) %>%
summarise(mean_ratio = mean(student_ratio), n = n()) %>%
filter(n == 6) %>%
arrange(mean_ratio) %>%
filter(row_number() %in% (n() - 1):n())
best <- student_ratio_GDP %>%
group_by(country_code) %>%
summarise(mean_ratio = mean(student_ratio), n = n()) %>%
filter(n == 6) %>%
arrange(mean_ratio) %>%
filter(row_number() %in% 1:2)
animated_plot <- ggplot() +
geom_point(data = student_ratio_GDP, aes(x = GDP, y = student_ratio, colour = country_code, size = GDP), alpha = 0.7)+
ggrepel::geom_label_repel(data = filter(student_ratio_GDP, country_code %in% best$country_code),
aes(x = GDP, y = student_ratio, label = country), nudge_y = 5, segment.size = 0.5, family = "Ubuntu", size = 6)+
ggrepel::geom_label_repel(data = filter(student_ratio_GDP, country_code %in% worst$country_code),
aes(x = GDP, y = student_ratio, label = country), nudge_x = 1, segment.size = 0.5, family = "Ubuntu", size = 6)+
scale_colour_viridis_d()+
labs(caption = "\nVisualisation by @ldbailey255 | GDP data: data.worldbank.org | Student ratio data: UNESCO",
y = "Primary student-teacher ratio", x = "GDP per capita")+
scale_y_continuous(limits = c(0, NA))+
scale_size_continuous(range = c(3, 10))+
theme_classic()+
theme(title = element_text(family = "Ubuntu", colour = "black", size = 16, margin = margin(t = 10)),
axis.text = element_text(family = "Ubuntu", size = 15, colour = "black"),
axis.title.y = element_text(family = "Ubuntu", size = 18, colour = "black", margin = margin(r = 10)),
axis.title.x = element_text(family = "Ubuntu", size = 18, colour = "black", margin = margin(t = 10)),
legend.position = "none")+
#Start gganimate code
gganimate::transition_time(time = year)+
labs(title = "Year: {frame_time}")+
gganimate::shadow_mark(alpha = 0.25, wrap = FALSE, size = 2, exclude_layer = 2:3)+
gganimate::ease_aes("linear")
if(stringr::str_detect(log, "x")){
animated_plot <- animated_plot +
scale_x_log10()
}
if(stringr::str_detect(log, "y")){
animated_plot <- animated_plot +
scale_y_log10()
}
options(gganimate.dev_args = list(width = 600, height = 520))
# gganimate::anim_save("./plots/07_05_19.gif", animation = animated_plot)
#
# return(animated_plot)
animated_plot
}